1. Data Wrangling and Initial Investigation

Importing and Cleaning Datasets

#data folder path 
path = paste0(getwd(), '/data/')

# Importing csv files

filenames <- list.files(path = path, pattern="*.csv")

# Loop to load all csv files
for(i in filenames){
  filepath <- paste0(path, i)
  name <- substr(i,1,nchar(i)-4)
  assign(name, read.csv(filepath))
}

# Importing store_restaurant separately because it is not a csv file
store_restaurant <- read_xlsx(paste0(path, 'store_restaurant.xlsx'))

Cleaning Ingredients column:

Checking how many “lettuce” observations we have:

lettuce <- ingredients %>% filter(str_detect(IngredientName, 'Lettuce'))
kable(lettuce)
IngredientName IngredientShortDescription IngredientId PortionUOMTypeId
Lettuce Lettuce 27 15
Lettuce - Metric Lettuce - Metric 291 13

Merging Tables

# Table1: menu items sold within each transaction
table1 <- inner_join(pos_ordersale, menuitem, 
                              by = c('MD5KEY_ORDERSALE', 'StoreNumber', 'date'), suffix=c('.T', '.M'))

# Table2: joining with menu_items to get equivalent recipe id for each menu item
table2 <- inner_join(table1, menu_items, by = c("PLU", c("Id" = "MenuItemId")))

# Table3: joining with sub-recipes 
table3 <- left_join(table2, recipe_sub_recipe_assignments, by = 'RecipeId')

# Table4: adding ingredient assignment tables for recipes (.R) and subrecipes (.S)
table4 <- left_join(table3, recipe_ingredient_assignments, by = 'RecipeId', suffix = c(".M", ".R"))
table4 <- left_join(table4, sub_recipe_ingr_assignments, by = 'SubRecipeId', suffix = c(".R", ".S"))

table4 <- rename(table4, Quantity.S = Quantity)

# Table5: Filtering for lettuce entries
table5 <- table4 %>% filter(IngredientId.R %in% c(27, 291) | IngredientId.S %in% c(27, 291))

Checking if there are any occurrences where lettuce appears as an ingredient of both the recipe and sub-recipe:

nrow(table5 %>% filter(IngredientId.R %in% c(27, 291) & IngredientId.S %in% c(27, 291)))
## [1] 0

In fact, lettuce is only used in recipes for some salads (which don’t have any sub-recipes)

ingred <- as.list(levels(factor((table5 %>% filter(IngredientId.R %in% c(27, 291)))$IngredientId.R)))
quant <- as.list(levels(factor((table5 %>% filter(IngredientId.R %in% c(27, 291)))$Quantity.R)))
cat <- as.list(levels(factor((table5 %>% filter(IngredientId.R %in% c(27, 291)))$CategoryDescription)))
sub_ingred <- as.list(levels(factor((table5 %>% filter(IngredientId.R %in% c(27, 291)))$IngredientId.S)))

cat(paste(" Ingredient ID: ", ingred, "\n", "Ingredient Quantity:", quant, "\n", 
          "Category:", cat, "\n", "Subrecipe Ingredient:", sub_ingred))
##  Ingredient ID:  27 
##  Ingredient Quantity: 5 
##  Category: Salad            
##  Subrecipe Ingredient:

Therefore, we can replace all NA occurrences in the IngredientId.S and Quantity.S columns with 27 and 5 respectively (so that everything is in the same columns)

table5$IngredientId.S[is.na(table5$IngredientId.S)] <- 27
table5$Quantity.S[is.na(table5$Quantity.S)] <- 5

As we can see from the lettuce table above, Lettuce (id:27) and Lettuce-Metric (id:291) have PortionUOMTypeId’s 15 and 13 which correspond to ounces and grams respectively. In order to correctly calculate all quantities we first need to convert all of them to ounces so that they are consistent. (1gram = 0.0353 ounce)

# Table6: Adding ounce column that converts metric quantities (id:291) to ounces
table6 <- table5 %>% mutate(Quantity_ounces = ifelse(IngredientId.S == 27, Quantity.S, Quantity.S*0.0353))

We also need to get the total quantity depending on how much of the sub-recipe is required for the total recipe (factor column). In the cases of some salads, where there is no sub-recipe, we first need to replace NAs with 1s so that when we multiply the final quantities we don’t zero out lettuce quantities.

# Replacing 0 (and NA) occurrences in factor column 
table6$Factor[is.na(table6$Factor)] <- 1

# Table7: Multiply quantity by sub-recipe factor and total menu item quantity
table7 <- table6 %>% mutate(Quantity_total = Quantity_ounces*Factor*Quantity.M)

# formatting date column since it will be used later 
table7$date <- as.Date(table7$date, format="%y-%m-%d")

This is the finalized table. We now need to start grouping so as to get the aggregate values of lettuce needed per day for each of the four stores. First we create 4 separate dataframes, one for each store:

store_4904 <-  table7 %>% filter(StoreNumber == 4904)
store_12631 <- table7 %>% filter(StoreNumber == 12631)
store_20974 <- table7 %>% filter(StoreNumber == 20974)
store_46673 <- table7 %>% filter(StoreNumber == 46673)

We can then group by MD5KEY_MENUITEM and then by date. When we aggregate by MD5KEY_MENUITEM, we don’t want to sum the quantities, so we can just choose the maximum amount (since they are all the same). We can then group by date again and sum the quantities so as to get the total quantity of lettuce used every day.

store_4904 <- store_4904 %>% 
  select(1, 9, 10, 32) %>% #selecting only columns we need
  unique() %>% #deleting duplicate rows (Transaction and MenuItem IDs)
  group_by(date) %>% #grouping by date
  summarise(Quantity_sum = sum(Quantity_total)) #summing the quantity of lettuce per day

store_12631 <- store_12631 %>% 
  select(1, 9, 10, 32) %>% 
  unique() %>% 
  group_by(date) %>% 
  summarise(Quantity_sum = sum(Quantity_total))

store_20974 <- store_20974 %>% 
  select(1, 9, 10, 32) %>% 
  unique() %>% 
  group_by(date) %>% 
  summarise(Quantity_sum = sum(Quantity_total))

store_46673 <- store_46673 %>% 
  select(1, 9, 10, 32) %>% 
  unique() %>% 
  group_by(date) %>% 
  summarise(Quantity_sum = sum(Quantity_total))

Time Series Preparation

Turning them into time series objects:

store_4904_ts <- ts(store_4904[,2],   frequency = 7, start = 11) #week 11
store_12631_ts <- ts(store_12631[,2], frequency = 7, start = 10) #week 10
store_20974_ts <- ts(store_20974[,2], frequency = 7, start = 10) 
store_46673_ts <- ts(store_46673[,2], frequency = 7, start = 10) 

Plotting time series:

p1 <- autoplot(store_4904_ts) + ggtitle('Store 4904') + ylab('Lettuce (ounces)')  + xlab('Week') 
p2 <- autoplot(store_12631_ts) + ggtitle('Store 12631') + ylab('Lettuce (ounces)')+ xlab('Week')
p3 <- autoplot(store_20974_ts) + ggtitle('Store 20974') + ylab('Lettuce (ounces)')+ xlab('Week')
p4 <- autoplot(store_46673_ts) + ggtitle('Store 46673') + ylab('Lettuce (ounces)')+ xlab('Week')

grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)

The time series plot for store 20974 shows that the first 6 observations could be potential outliers as they are unusually small. They are thus filtered out of the time series in order to obtain more accurate forecasts.

store_20974_ts <- window(store_20974_ts, start=c(10,7))

In the following sections, the time series will be modeled using two different methods, and the performance of their forecasts will be compared. In order to assess model fit and forecast performance, a training and test set will be created for each time series. This will be done using the ts_split() function which splits a time series object into training and test sets by assigning the older observations into the training and the most recent observations into the test set. Since we have been asked to provide a forecast for the next two weeks, the test set has been set to contain 15% of the data so that the number of test days approximately reflects the actual forecasting period. For both forecasting methods, detailed steps will be outlined for store 4904, and then a summary of calculations will be provided for the other 3 stores.

store_4904_split <- ts_split(ts.obj =store_4904_ts, round(0.15*nrow(store_4904_ts)))
store_4904_train <- store_4904_split$train
store_4904_test <- store_4904_split$test

store_12631_split <- ts_split(ts.obj =store_12631_ts, round(0.15*nrow(store_12631_ts)))
store_12631_train <- store_12631_split$train
store_12631_test <-  store_12631_split$test

store_20974_split <- ts_split(ts.obj =store_20974_ts, round(0.15*nrow(store_20974_ts)))
store_20974_train <- store_20974_split$train
store_20974_test <-  store_20974_split$test

store_46673_split <- ts_split(ts.obj =store_46673_ts, round(0.15*nrow(store_46673_ts)))
store_46673_train <- store_46673_split$train
store_46673_test <-  store_46673_split$test

# Checking the number of observations in each set

cat(paste(" store_4904:  Training Set -",length(store_4904_train ), "& Test Set -", length(store_4904_test ), "\n", 
          "store_12631: Training Set -", length(store_12631_train), "& Test Set -", length(store_12631_test), "\n", 
          "store_20974: Training Set -", length(store_20974_train), "& Test Set -", length(store_20974_test), "\n", 
          "store_46673: Training Set -", length(store_46673_train), "& Test Set -", length(store_46673_test)))
##  store_4904:  Training Set - 81 & Test Set - 14 
##  store_12631: Training Set - 88 & Test Set - 15 
##  store_20974: Training Set - 75 & Test Set - 13 
##  store_46673: Training Set - 88 & Test Set - 15

2. Holt-Winters Model

The lettuce demand for the next two weeks will be firstly forecasted using the Holt-Winters Model. This methods allows for trend and seasonality corrected exponential smoothing ie. assign exponentially decreasing weights as the data points become more recent. This method consists of one forecasting equation and three smoothing equations as shown below:

\[f_{t,h} = (\hat{L_t} + h\hat{T_t})\hat{S}_{t+h} \text{ for any } h>0 \\\] \[\hat{L}_{t+1} = \alpha(X_{t+1}/\hat{S}_{t+1} + (1-\alpha)(\hat{L_t} + \hat{T_t}) \\\]

\[\hat{T}_{t+1} = \beta(\hat{L}_{t+1}-\hat{L_{t}}) + (1-\beta)\hat{T_t}\\\]

\[\hat{S}_{t+p+1} = \gamma(X_{t+1}/\hat{L}_{t+1}) + (1-\gamma)\hat{S}_{t+1}\\\]

where \(\hat{L_t}\), \(\hat{T_t}\) and \(\hat{S_t}, ...,\hat{S}_{t+p+1}\) are the estimates of the level, trend and seasonal factors respectively for a time period t. In addition, \(\alpha, \beta, \gamma\) are the smoothing parameters for level, trend and seasonal factors respectively, and are all between 0 and 1.

The optimal Holt-Winters model will be applied using the ets function of the forecast package.

Store 4904

The stl function is used to break down the series into a trend, seasonality and error level:

store_4904_train[,1] %>% stl(s.window = "periodic") %>% autoplot + xlab('Week') + ggtitle('Time Series Decomposition for Store 4904')

The gray bars on the right represent the relative importance of each level by taking into account the range of y values that the trend, seasonality and error components reflect. In general, the smaller the gray bar the higher the relative importance. In the plot above, we can see that the bars for both trend and seasonality are quite big, indicating that their contribution to the variation of lettuce demand in the original time series is very small.

A Holt-Winters model will now be fitted using R’s ets function which takes three arguments; one for the error, trend and seasonality type. Since the trend component has the largest bar, it will be set to None. In addition, there is no evidence of exponentially increasing variation in the seasonal and error components. Therefore, additive parameters will be used as opposed to multiplicative. A second model will also be fitted by using the parameters ‘Z’ as arguments. This tells the ets function to estimate all models and return the one with the lowest information criteria. Both the aic and bic criteria will be used, and the model that predicts the best will be used.

# Fitting models
store_4904_ets <- ets(store_4904_train, model = "ANA")
store_4904_ets_testa <- ets(store_4904_train, model = "ZZZ", ic="aic")
store_4904_ets_testb <- ets(store_4904_train, model = "ZZZ", ic="bic")

# Checking what model is recommended by the 'ZZZ' approach
store_4904_ets_testa$components[1:3]
## [1] "A" "N" "A"
store_4904_ets_testb$components[1:3]
## [1] "A" "N" "A"

Therefore, the automatic selection approach confirms our allocation of parameters and we can thus proceed with the ‘ANA’ model.

Forecasting the next 14 days and obtaining in-sample (training set fit) and out of sample (test set fit) accuracy:

kable(accuracy(forecast(store_4904_ets, h = 14) , store_4904_test))
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.9194867 43.04688 32.02198 -2.351651 10.999318 0.6799502 -0.0667622 NA
Test set -9.7799936 37.75467 28.39140 -3.682129 9.266691 0.6028589 0.1306179 0.4259769

In general, we expect test errors to be slightly higher that training errors. In this case however, we observe a slightly lower value for some error metrics like the Residual Mean Square Error (RMSE) and Marginal Absolute Error (MAE). This could be due to the fact that our data set is small and that test set is much smaller than the training set. Since we have trained our model on 85% of the data, it seems to be generalising well on the test set.

We can perform the following residual checks to ensure that the model fits the data well. The following residual plot shows that despite a few discrepancies, the residuals have a zero-mean and constant variance.

autoplot(store_4904_ets$residuals) + ggtitle('ETS plot Residuals') + xlab('Week') + ylab('Residuals')

In addition, by looking at the ACF plot of the residuals we can see that they do not present any significant spikes.

ggAcf(store_4904_ets$residuals) + ggtitle('ACF plot for ETS Model Residuals')

Finally, the Ljung-Box test statistic is used to determine whether the residuals are identically and independently distributed. In other words, it confirms whether the residuals form white noise. For this test statistic, the null hypothesis states that there is no lack of fit and thus the residuals are iid. The alternative hypothesis states that the model does show lack of fit.

Box.test(store_4904_ets$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  store_4904_ets$residuals
## X-squared = 0.37457, df = 1, p-value = 0.5405

Since our p-value is larger than the critical value of 0.05, we don’t have enough evidence to reject the null hypothesis and conclude that our model fits the data well and the residuals are iid.

Now that we can confirmed that all residuals meet the essential statistical assumptions, the model is re-trained on the entire data set to make an out-of-sample forecast for the next 14 days:

store_4904_ets_final <- ets(store_4904_ts, model = "ANA")
store_4904_ets_fc <- forecast(store_4904_ets_final, h = 14)
kable(store_4904_ets_fc)
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
24.57143 359.2945 302.4655 416.1235 272.3820 446.2069
24.71429 347.8354 290.1187 405.5521 259.5654 436.1055
24.85714 308.8660 250.2751 367.4569 219.2589 398.4731
25.00000 212.2648 152.8125 271.7171 121.3403 303.1892
25.14286 212.9511 152.6498 273.2525 120.7282 305.1741
25.28571 336.5967 275.4580 397.7353 243.0932 430.1002
25.42857 336.8785 274.9129 398.8440 242.1103 431.6466
25.57143 359.2945 296.5138 422.0751 263.2798 455.3092
25.71429 347.8354 284.2501 411.4207 250.5901 445.0807
25.85714 308.8660 244.4861 373.2459 210.4055 407.3265
26.00000 212.2648 147.1000 277.4295 112.6038 311.9257
26.14286 212.9511 147.0108 278.8915 112.1041 313.7982
26.28571 336.5967 269.8898 403.3035 234.5773 438.6160
26.42857 336.8785 269.4129 404.3440 233.6988 440.0581

The below time series plot shows the 14-day forecast. The shaded area represents the 80% and 95% prediction interval.

plot(store_4904_ets_fc)


Store 12631

The stl function is used to break down the series into a trend, seasonal and error level:

store_12631_train[,1] %>% stl(s.window = "periodic") %>% autoplot + xlab('Week') + ggtitle('Time Series Decomposition for Store 12631')

In the plot above, we can see that the bar for the seasonality factors is very large, indicating that its contribution to the variation of lettuce demand in the original time series is very small. Trend has a smaller bar, showing that it could be an important component of the time series. The remainder of the time series, shown as the last plot above, seems to have a multiplicative nature as the peak bars seem to be increasing as the trend goes up. Therefore, the model that will be initially fitted is a “MAN” model.

# Fitting models
store_12631_ets <- ets(store_12631_train, model = "MAN")
store_12631_ets_testa <- ets(store_12631_train, model = "ZZZ", ic="aic")
store_12631_ets_testb <- ets(store_12631_train, model = "ZZZ", ic="bic")

# Checking what model is recommended by the 'ZZZ' approach
store_12631_ets_testa$components[1:3]
## [1] "M" "A" "M"
store_12631_ets_testb$components[1:3]
## [1] "M" "N" "A"

With both AIC and BIC, the automatic selection suggests that seasonality is important despite the large relative importance bar. In order to pick the best model, we will examine which of them predicts the best:

kable(accuracy(forecast(store_12631_ets, h = 15) , store_12631_test), caption = 'Training and Test Errors for "MAN" model')
Training and Test Errors for “MAN” model
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.2724255 42.44260 34.22707 -2.57042 13.24241 0.9407509 0.0969064 NA
Test set -46.4450438 66.47585 62.26579 -21.06293 25.32277 1.7114113 0.1063764 1.026794
kable(accuracy(forecast(store_12631_ets_testa, h = 15) , store_12631_test), caption = 'Training and Test Errors for "MAM" model')
Training and Test Errors for “MAM” model
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.6889518 33.75130 25.75517 -1.934287 9.914962 0.7078957 0.0429375 NA
Test set -25.4668802 47.20014 40.72668 -12.024633 16.310291 1.1193964 -0.0025089 0.7363123
kable(accuracy(forecast(store_12631_ets_testb, h = 15) , store_12631_test), caption = 'Training and Test Errors for "MNA" model')
Training and Test Errors for “MNA” model
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 4.068353 35.90313 27.44120 -0.1419881 10.40678 0.7542373 0.0004899 NA
Test set -20.188511 45.53447 39.35159 -10.1282790 15.50693 1.0816011 0.0272487 0.7056087

The “MNA” model has much lower test errors, suggesting that it forecasts the data better than the other two models.

The typical residual checks are now performed.

autoplot(store_12631_ets_testb$residuals) + ggtitle('ETS plot Residuals') + xlab('Week') + ylab('Residuals')

ggAcf(store_12631_ets_testb$residuals) + ggtitle('ACF plot for ETS Model Residuals')

Box.test(store_12631_ets_testb$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  store_12631_ets_testb$residuals
## X-squared = 0.033893, df = 1, p-value = 0.8539

The residuals seem to have a zero-mean and constant variance, all ACF values are non-significant and since our p-value is larger than the critical value of 0.05, we don’t have enough evidence to reject the null hypothesis and conclude that our model fits the data well and the residuals are iid.

Therefore, the “MNA” model will be used to re-train the entire data and forecast the next 14 days.

store_12631_ets_final <- ets(store_12631_ts, model = "MNA")
store_12631_ets_fc <- forecast(store_12631_ets_final, h = 14)

The below time series plot shows the 14-day forecast. The shaded area represents the 80% and 95% prediction interval.

plot(store_12631_ets_fc)


Store 20974

The stl function is used to break down the series into a trend, seasonal and error level:

store_20974_train[,1] %>% stl(s.window = "periodic") %>% autoplot + xlab('Week') + ggtitle('Time Series Decomposition for Store 20974')

In the plot above, we can see that the bar for the trend factors is very large, indicating that its contribution to the variation of lettuce demand in the original time series is very small to almost non existent. The seasonality factors have a slightly shorter bar suggesting that they could be significant in the model. In addition, we can see that both the seasonality and the remainder components seem to be additive as opposed to multiplicative (as the variance does not increase in a multiplicative manner). Therefore, the model that will be initially fitted is a “ANA” model.

# Fitting models
store_20974_ets <- ets(store_20974_train, model = "ANA")
store_20974_ets_testa <- ets(store_20974_train, model = "ZZZ", ic="aic")
store_20974_ets_testb <- ets(store_20974_train, model = "ZZZ", ic="bic")

# Checking what model is recommended by the 'ZZZ' approach
store_20974_ets_testa$components[1:3]
## [1] "A" "N" "A"
store_20974_ets_testb$components[1:3]
## [1] "A" "N" "A"

The automatic selection confirms that the “ANA” model has the best fit.

kable(accuracy(forecast(store_20974_ets, h = 13) , store_20974_test), caption = 'Training and Test Errors for "ANA" model')
Training and Test Errors for “ANA” model
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -1.024937 40.4981 33.41008 -13.064614 25.69316 0.7151041 0.0868321 NA
Test set 25.205681 56.8624 34.53942 8.304245 13.70945 0.7392763 -0.3209396 0.831531

The typical residual checks are now performed.

autoplot(store_20974_ets$residuals) + ggtitle('ETS plot Residuals') + xlab('Week') + ylab('Residuals')

ggAcf(store_20974_ets$residuals) + ggtitle('ACF plot for ETS Model Residuals')

Box.test(store_20974_ets$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  store_20974_ets$residuals
## X-squared = 0.58841, df = 1, p-value = 0.443

The residuals seem to have a zero-mean and constant variance, all ACF values are non-significant and since our p-value is larger than the critical value of 0.05, we don’t have enough evidence to reject the null hypothesis and conclude that our model fits the data well and the residuals are iid.

Therefore, the “AAA” model will be used to re-train the entire data and forecast the next 14 days.

store_20974_ets_final <- ets(store_20974_ts, model = "ANA")
store_20974_ets_fc <- forecast(store_20974_ets_final, h = 14)

The below time series plot shows the 14-day forecast. The shaded area represents the 80% and 95% prediction interval.

plot(store_20974_ets_fc)


Store 46673

The stl function is used to break down the series into a trend, seasonal and error level:

store_46673_train[,1] %>% stl(s.window = "periodic") %>% autoplot + xlab('Week') + ggtitle('Time Series Decomposition for Store 46673')

In the plot above, we can see that the bar for the trend component is very large, indicating that its contribution to the variation of lettuce demand in the original time series is very small to almost non existent. The seasonal factors have a much smaller relative importance bar which means that seasonality is an important component of the time series. In addition, we can see that both the seasonality and the remainder components seem to be additive as opposed to multiplicative (as the variance does not increase in a multiplicative manner). Therefore, the model that will be initially fitted is a “ANA” model. This will be compared to the automatically selected model using “ZZZ”.

# Fitting models
store_46673_ets <- ets(store_46673_train, model = "ANA")
store_46673_ets_testa <- ets(store_46673_train, model = "ZZZ", ic="aic")
store_46673_ets_testb <- ets(store_46673_train, model = "ZZZ", ic="bic")

# Checking what model is recommended by the 'ZZZ' approach
store_46673_ets_testa$components[1:3]
## [1] "A" "N" "A"
store_46673_ets_testb$components[1:3]
## [1] "A" "N" "A"

The automatic selection confirms that “ANA” is a better model. Therefore, we proceed by checking how well it forecasts the data:

kable(accuracy(forecast(store_46673_ets, h = 15) , store_46673_test), caption = 'Training and Test Errors for "ANA" model')
Training and Test Errors for “ANA” model
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -1.100689 23.02907 18.44768 -3.895680 14.36341 0.7055063 0.0856638 NA
Test set 15.820392 38.73735 28.60888 8.421665 18.29080 1.0941074 0.0573268 0.6199927

The typical residual checks are now performed.

autoplot(store_46673_ets$residuals) + ggtitle('ETS plot Residuals') + xlab('Week') + ylab('Residuals')

ggAcf(store_46673_ets$residuals) + ggtitle('ACF plot for ETS Model Residuals')

Box.test(store_46673_ets$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  store_46673_ets$residuals
## X-squared = 0.66804, df = 1, p-value = 0.4137

The residuals seem to have a zero-mean and constant variance, the majority of ACF values are non-significant and since our p-value is larger than the critical value of 0.05, we don’t have enough evidence to reject the null hypothesis and conclude that our model fits the data well and the residuals are iid.

Re-training the model on the entire dataset:

store_46673_ets_final <- ets(store_46673_ts, model = "ANA")
store_46673_ets_fc <- forecast(store_46673_ets_final, h = 14)

The below time series plot shows the 14-day forecast. The shaded area represents the 80% and 95% prediction interval.

plot(store_46673_ets_fc)


3. ARIMA Model

The first step in a time series analysis is to determine whether the time series in question is stationary, ie:

Store 4904

We first look at the time series plot to see if there is a clear trend:

autoplot(store_4904_train) + ggtitle('Daily Lettuce Demand for Store 4904', subtitle = 'Training Set') + xlab('Week') + ylab('Lettuce Quantity (ounces)')

As shown above, there seems to be a constant trend across the time series for store 4904. This can also be confirmed by calling R’s ndiffs() function, which gives the number of differences required in order to stationarize a time series.

ndiffs(store_4904_train)
## [1] 0

Since there are 0 differences required, we get confirmation that the series is stationary in terms of trend. However, when looking at the time series plot above, there seems to be a seasonality in the data. This can be confirmed using the nsdiffs function, which calculates how many seasonal differences are required in order to stationarize the series.

nsdiffs(store_4904_train)
## [1] 1

Since we get a value of 1, we need to seasonally difference the series one time. Since the seasonal pattern seems to arise on a weekly basis, a lag of 7 is added when differencing. This means that the value observed 7 time periods ago (in this case, 7 days) is substracted from the current period.

store_4904_train_sdiff <- diff(store_4904_train, lag=7, differences=1)
autoplot(store_4904_train_sdiff) + ggtitle('Store 4904 - Seasonally Differenced Time Series') + xlab('Week') + ylab('Differenced Lettuce Quantity')

Checking that no further differencing is required:

ndiffs(store_4904_train_sdiff)
## [1] 0
nsdiffs(store_4904_train_sdiff)
## [1] 0

We can now ensure that the time series is stationary using the following stationarity tests.

adf.test(store_4904_train_sdiff) 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  store_4904_train_sdiff
## Dickey-Fuller = -0.98038, Lag order = 4, p-value = 0.9349
## alternative hypothesis: stationary

The adf (Augmented Dickey Fuller) test is used to determine whether there is a unit root in the series. The null hypothesis is that the series has a unit root (ie. might be non-stationary) and the alternative is that the series has no unit root and is thus stationary. In the above output we get a p-value of 0.9 which is larger than the critical value of 0.05. Therefore we do not have enough evidence to reject the null hypothesis, which could mean that the series is not stationary.

pp.test(store_4904_train_sdiff)
## 
##  Phillips-Perron Unit Root Test
## 
## data:  store_4904_train_sdiff
## Dickey-Fuller Z(alpha) = -75.208, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary

The pp (Philips-Perron) test is very similar to the adf test but allows for autocorrelated residuals. It has the same null and alternative hypothesis as the adf test. For this test we get a p-value of 0.01, thus rejecting the null hypothesis. The first two tests therefore present contradictory results. In such a small data set it can often be the case that such test present different results. In addition, both the adf and pp tests are known to usually fail to reject the null. In order to resolve this issue, a stationarity test is used.

kpss.test(store_4904_train_sdiff) 
## 
##  KPSS Test for Level Stationarity
## 
## data:  store_4904_train_sdiff
## KPSS Level = 0.15501, Truncation lag parameter = 3, p-value = 0.1

The KPSS test is a stationarity test with a null and alternative hypothesis opposite to the adf and pp test ie. the null says that the series is stationary. In the above output we get a p value of 0.1 which is higher than 0.05. Therefore, we do not have enough evidence to reject the null and conclude that the series is stationary.

Now that we have confirmed that the time series is stationary, we can start building the ARIMA model. An effective way of determining the orders of the AR and MA components is the identification stage of the Box-Jenkins procedure, which states that the selection of ARIMA parameters is based on the ACF (Autocorrelation Function) and PACF (Partial Autocorrelation Function) plots of the time series. These are all based on the training sets that were created above.

grid.arrange(ggAcf(store_4904_train_sdiff , lag.max = 40, main = ''), 
                  ggPacf(store_4904_train_sdiff, lag.max = 40, main = ''), 
                  ncol = 1, nrow = 2, 
                  top = 'STORE 4904') 

Using the plots above, and taking into account that, despite some discrepancies, the ACF and PACF values that lie between \(\pm 1.96 / \sqrt{n}\) are negligible, we make the following observations. When an ACF plot is significant at lag m and presents a geometric decay at each m lag in the PACF plot, then we need to add a seasonal MA component. The ACF plot has no significant correlations except for a peak at lag 7. This means that there is a strong correlation between now and 1 week ago in terms of lettuce demand. In other words, there is a strong weekly seasonality present in the time series. Since we don’t observe any other important peaks (eg. in lag 14 or 21), we conclude that the seasonal MA component of the ARIMA model should be 1. This can also be confirmed from the fact that the PACF plot shows exponential decay at lag 7 (and a faint exponential decay at every multiple of lag 7).

The auto.arima() function will now be used, which chooses optimal values for p and q based on information criteria. By enabling the trace argument, we are telling the function to take a look at all of the models evaluated along the way. The recommended models differ significantly when the information criteria is set to AIC and BIC. Therefore, the two models with the lowest AIC and two models with lowest BIC will be selected and compared to see which one forecasts the best.

auto.arima(store_4904_train, trace = TRUE, ic = 'aic')
## 
##  ARIMA(2,0,2)(1,1,1)[7] with drift         : Inf
##  ARIMA(0,0,0)(0,1,0)[7] with drift         : 823.5309
##  ARIMA(1,0,0)(1,1,0)[7] with drift         : 815.5525
##  ARIMA(0,0,1)(0,1,1)[7] with drift         : Inf
##  ARIMA(0,0,0)(0,1,0)[7]                    : 821.5322
##  ARIMA(1,0,0)(0,1,0)[7] with drift         : 825.1209
##  ARIMA(1,0,0)(2,1,0)[7] with drift         : 810.3328
##  ARIMA(1,0,0)(2,1,1)[7] with drift         : Inf
##  ARIMA(1,0,0)(1,1,1)[7] with drift         : Inf
##  ARIMA(0,0,0)(2,1,0)[7] with drift         : 809.4067
##  ARIMA(0,0,0)(1,1,0)[7] with drift         : 813.6334
##  ARIMA(0,0,0)(2,1,1)[7] with drift         : Inf
##  ARIMA(0,0,0)(1,1,1)[7] with drift         : Inf
##  ARIMA(0,0,1)(2,1,0)[7] with drift         : 810.7236
##  ARIMA(1,0,1)(2,1,0)[7] with drift         : 806.4279
##  ARIMA(1,0,1)(1,1,0)[7] with drift         : 814.7813
##  ARIMA(1,0,1)(2,1,1)[7] with drift         : Inf
##  ARIMA(1,0,1)(1,1,1)[7] with drift         : Inf
##  ARIMA(2,0,1)(2,1,0)[7] with drift         : 806.0656
##  ARIMA(2,0,1)(1,1,0)[7] with drift         : 813.2674
##  ARIMA(2,0,1)(2,1,1)[7] with drift         : Inf
##  ARIMA(2,0,1)(1,1,1)[7] with drift         : Inf
##  ARIMA(2,0,0)(2,1,0)[7] with drift         : 806.2977
##  ARIMA(3,0,1)(2,1,0)[7] with drift         : Inf
##  ARIMA(2,0,2)(2,1,0)[7] with drift         : 807.1963
##  ARIMA(1,0,2)(2,1,0)[7] with drift         : 805.7785
##  ARIMA(1,0,2)(1,1,0)[7] with drift         : 814.5506
##  ARIMA(1,0,2)(2,1,1)[7] with drift         : Inf
##  ARIMA(1,0,2)(1,1,1)[7] with drift         : Inf
##  ARIMA(0,0,2)(2,1,0)[7] with drift         : 808.5373
##  ARIMA(1,0,3)(2,1,0)[7] with drift         : Inf
##  ARIMA(0,0,3)(2,1,0)[7] with drift         : 810.4687
##  ARIMA(2,0,3)(2,1,0)[7] with drift         : 809.0441
##  ARIMA(1,0,2)(2,1,0)[7]                    : 803.7958
##  ARIMA(1,0,2)(1,1,0)[7]                    : 812.6489
##  ARIMA(1,0,2)(2,1,1)[7]                    : Inf
##  ARIMA(1,0,2)(1,1,1)[7]                    : Inf
##  ARIMA(0,0,2)(2,1,0)[7]                    : 806.7005
##  ARIMA(1,0,1)(2,1,0)[7]                    : 804.4291
##  ARIMA(2,0,2)(2,1,0)[7]                    : 805.2078
##  ARIMA(1,0,3)(2,1,0)[7]                    : Inf
##  ARIMA(0,0,1)(2,1,0)[7]                    : 809.1625
##  ARIMA(0,0,3)(2,1,0)[7]                    : 808.6235
##  ARIMA(2,0,1)(2,1,0)[7]                    : 804.0774
##  ARIMA(2,0,3)(2,1,0)[7]                    : 807.0605
## 
##  Best model: ARIMA(1,0,2)(2,1,0)[7]
## Series: store_4904_train 
## ARIMA(1,0,2)(2,1,0)[7] 
## 
## Coefficients:
##          ar1      ma1     ma2     sar1     sar2
##       0.8641  -0.8480  0.2176  -0.6466  -0.4266
## s.e.  0.0977   0.1637  0.1351   0.1397   0.1264
## 
## sigma^2 = 2626:  log likelihood = -395.9
## AIC=803.8   AICc=805.05   BIC=817.62

Thus, the models with the lowest AIC are:

  • Model 1a: ARIMA(1,0,2)(2,1,0)[7] (AIC = 803.8)
  • Model 2a: ARIMA(2,0,1)(2,1,0)[7] (AIC = 804.1)
auto.arima(store_4904_train, trace = TRUE, ic = 'bic', d=0, D=1)
## 
##  ARIMA(2,0,2)(1,1,1)[7] with drift         : Inf
##  ARIMA(0,0,0)(0,1,0)[7] with drift         : 828.139
##  ARIMA(1,0,0)(1,1,0)[7] with drift         : 824.7688
##  ARIMA(0,0,1)(0,1,1)[7] with drift         : Inf
##  ARIMA(0,0,0)(0,1,0)[7]                    : 823.8362
##  ARIMA(0,0,0)(1,1,0)[7] with drift         : 820.5456
##  ARIMA(0,0,0)(2,1,0)[7] with drift         : 818.6229
##  ARIMA(0,0,0)(2,1,1)[7] with drift         : Inf
##  ARIMA(0,0,0)(1,1,1)[7] with drift         : Inf
##  ARIMA(1,0,0)(2,1,0)[7] with drift         : 821.8531
##  ARIMA(0,0,1)(2,1,0)[7] with drift         : 822.244
##  ARIMA(1,0,1)(2,1,0)[7] with drift         : 820.2523
##  ARIMA(0,0,0)(2,1,0)[7]                    : 814.8431
##  ARIMA(0,0,0)(1,1,0)[7]                    : 816.4742
##  ARIMA(0,0,0)(2,1,1)[7]                    : Inf
##  ARIMA(0,0,0)(1,1,1)[7]                    : Inf
##  ARIMA(1,0,0)(2,1,0)[7]                    : 817.9219
##  ARIMA(0,0,1)(2,1,0)[7]                    : 818.3788
##  ARIMA(1,0,1)(2,1,0)[7]                    : 815.9494
## 
##  Best model: ARIMA(0,0,0)(2,1,0)[7]
## Series: store_4904_train 
## ARIMA(0,0,0)(2,1,0)[7] 
## 
## Coefficients:
##          sar1     sar2
##       -0.5576  -0.3439
## s.e.   0.1296   0.1341
## 
## sigma^2 = 2936:  log likelihood = -400.97
## AIC=807.93   AICc=808.27   BIC=814.84

Thus the models with lowest BIC are:

  • Model 1b: ARIMA(0,0,0)(2,1,0)[7] (BIC = 814.8)
  • Model 2b: ARIMA(1,0,1)(2,1,0)[7] (BIC = 815.9)

Both methods seem to suggest that in fact we should include an seasonal AR instead of MA component.

# fitting four candidate models 

store_4904_arima_1a <- Arima(store_4904_train, order = c(1, 0, 2), 
                       seasonal = list(order = c(2, 1, 0), period = 7), include.drift = FALSE)
store_4904_arima_2a <- Arima(store_4904_train, order = c(2, 0, 1), 
                       seasonal = list(order = c(2, 1, 0), period = 7), include.drift = FALSE)
store_4904_arima_1b <- Arima(store_4904_train, order = c(0, 0, 0), 
                       seasonal = list(order = c(2, 1, 0), period = 7), include.drift = FALSE)
store_4904_arima_2b <- Arima(store_4904_train, order = c(1, 0, 1), 
                       seasonal = list(order = c(2, 1, 0), period = 7), include.drift = FALSE)

We can first investigate how well they fit and forecast the data by looking at the training and test set errors.

kable(accuracy(forecast(store_4904_arima_1a, h = 14), store_4904_test), caption='Training and Test Errors for ARIMA(1,0,2)(2,1,0)[7]')
Training and Test Errors for ARIMA(1,0,2)(2,1,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 0.0617485 47.29793 34.83913 -1.266520 11.56128 0.7397691 0.0154973 NA
Test set -12.7321766 43.87772 35.32210 -5.882579 11.98228 0.7500244 -0.2895055 0.5625631
kable(accuracy(forecast(store_4904_arima_2a, h = 14), store_4904_test), caption='Training and Test Errors for ARIMA(2,0,1)(2,1,0)[7]')
Training and Test Errors for ARIMA(2,0,1)(2,1,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.0327374 47.43893 34.90898 -1.363304 11.61921 0.7412524 -0.0022362 NA
Test set -11.0303466 42.82571 34.92296 -5.249006 11.80578 0.7415492 -0.2831604 0.5519051
kable(accuracy(forecast(store_4904_arima_1b, h = 14), store_4904_test), caption='Training and Test Errors for ARIMA(0,0,0)(2,1,0)[7]')
Training and Test Errors for ARIMA(0,0,0)(2,1,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -2.765217 51.08438 38.18109 -2.75980 12.952629 0.8107318 0.1189138 NA
Test set 3.993632 35.73311 30.16142 -0.03241 9.986075 0.6404433 -0.3477396 0.4763777
kable(accuracy(forecast(store_4904_arima_2b, h = 14), store_4904_test), caption='Training and Test Errors for ARIMA(1,0,1)(2,1,0)[7]')
Training and Test Errors for ARIMA(1,0,1)(2,1,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.5390384 48.12504 35.12393 -1.657746 11.85528 0.7458167 -0.1212624 NA
Test set -2.8671278 41.37553 33.59529 -2.535405 11.37570 0.7133577 -0.3400886 0.5575332

Even though the ARIMA(0,0,0)(2,1,0)[7] model is not the best fit to the training set, it is slightly better at forecasting the data than the other models. The interpretation of the ACF and PACF plots above however, suggested that we we should include a seasonal MA component. We will therefore try to improve the above model further by setting the seasonal MA component to 1 and the AR to 0.

store_4904_arima_1bb <- Arima(store_4904_train, order = c(0, 0, 0), 
                       seasonal = list(order = c(0, 1, 1), period = 7))
kable(accuracy(forecast(store_4904_arima_1bb, h = 14), store_4904_test), caption='Training and Test Errors for ARIMA(0,0,0)(0,1,1)[7]')
Training and Test Errors for ARIMA(0,0,0)(0,1,1)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -2.806069 49.98830 36.22054 -2.768926 12.340051 0.7691017 0.1673837 NA
Test set -10.411252 34.99824 27.70366 -4.456707 9.284251 0.5882556 -0.0425864 0.4101133

The above model indeed performs better both on the training and test data set. This is an example of a case where stepwise selection based on information criteria does not always result in the most appropriate model.

We will therefore focus on the ARIMA(0,0,0)(0,1,1)[7] model. This model does not contain any autoregressive or moving average terms since the values for p and q is set to 0. The d parameter is set to zero since no trends were observed in the data and thus no trend differencing was required.

The following residual checks are now performed to ensure they have mean 0 and a constant variance.

autoplot(store_4904_arima_1bb$residuals) + ggtitle('ARIMA(0,0,0)(0,1,1)[7] Residuals') + xlab('Week') + ylab('Residuals')

The plot above shows that the residuals vaguely have a zero mean and constant variance. As this is a very small dataset, we cannot expect the variance to be completely constant across all residuals.

Another residual check is to look at their ACF plot. Ideally, we want the residual ACF to be non-significant ie. to not show any significant spikes. Except for two discrepancies, we can see that all ACF values are within the critical value range.

ggAcf(store_4904_arima_1b$residuals) + ggtitle('ACF plot for ARIMA(0,0,0)(0,1,1)[7] Residuals')

As with the Holt-Winters method, we also use the Ljung-Box test to ensure that the residuals are identically and independently distributed.

Box.test(store_4904_arima_1b$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  store_4904_arima_1b$residuals
## X-squared = 1.1883, df = 1, p-value = 0.2757

Since our p-value is larger than the critical value of 0.05, we don’t have enough evidence to reject the null hypothesis and conclude that our model fits the data well and the residuals are iid.

Therefore, we can now re-train the model using the entire data set and forecast the demand of lettuce for the next two weeks as shown below:

store_4904_arima_final <- Arima(store_4904_ts, order = c(0, 0, 0), 
                       seasonal = list(order = c(0, 1, 1), period = 7))
store_4904_arima_fc <- forecast(store_4904_arima_final, h = 14)
kable(store_4904_arima_fc )
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
24.57143 360.4455 295.9835 424.9076 261.8593 459.0318
24.71429 328.7233 264.2612 393.1854 230.1370 427.3095
24.85714 296.3099 231.8478 360.7720 197.7237 394.8961
25.00000 232.0672 167.6054 296.5291 133.4813 330.6531
25.14286 231.4814 167.0196 295.9433 132.8955 330.0673
25.28571 362.6138 298.1519 427.0757 264.0279 461.1997
25.42857 336.8297 272.3678 401.2915 238.2438 435.4156
25.57143 360.4455 292.3837 428.5074 256.3539 464.5372
25.71429 328.7233 260.6614 396.7852 224.6316 432.8149
25.85714 296.3099 228.2480 364.3718 192.2182 400.4016
26.00000 232.0672 164.0056 300.1289 127.9759 336.1586
26.14286 231.4814 163.4198 299.5431 127.3901 335.5728
26.28571 362.6138 294.5521 430.6755 258.5225 466.7051
26.42857 336.8297 268.7680 404.8914 232.7383 440.9210

The below time series plot shows the 14-day forecast. The shaded area represents the 80% and 95% prediction interval.

plot(store_4904_arima_fc)


Store 12631

We first look at the time series plot to see if there is a clear trend:

autoplot(store_12631_train) + ggtitle('Daily Lettuce Demand for Store 12631', subtitle = 'Training Set') + xlab('Week') + ylab('Lettuce Quantity (ounces)')

As we can see, there is a slight increasing trend across the time series for store 12631. This can also be confirmed by calling R’s ndiffs() function, which gives the number of differences required in order to stationarize a time series.

ndiffs(store_12631_train)
## [1] 1

Since there is 1 difference required, we get confirmation that the series is not stationary in terms of trend. Checking the nsdiffs function and also the plot above, confirms that there are not any strong seasonality patterns:

nsdiffs(store_12631_train)
## [1] 0

Therefore, we only take a first order difference of the series:

store_12631_train_diff <- diff(store_12631_train, differences=1)
autoplot(store_12631_train_diff) + ggtitle('Store 12631 - Seasonally Differenced Time Series') + xlab('Week') + ylab('Differenced Lettuce Quantity')

Looking at the new plot, it is clear that the time series is not fully de-trended. We can now ensure that the time series is stationary using the following stationarity tests.

adf.test(store_12631_train_diff) 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  store_12631_train_diff
## Dickey-Fuller = -7.2885, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
pp.test(store_12631_train_diff)
## 
##  Phillips-Perron Unit Root Test
## 
## data:  store_12631_train_diff
## Dickey-Fuller Z(alpha) = -110.16, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(store_12631_train_diff) 
## 
##  KPSS Test for Level Stationarity
## 
## data:  store_12631_train_diff
## KPSS Level = 0.032552, Truncation lag parameter = 3, p-value = 0.1

In both the adf and pp test we reject the null hypothesis and conclude that the time series does not have unit roots and is thus stationary. In addition, we fail to reject the null hypothesis in the kpss test which is further confirmation that the series is stationary.

Now that the time series is stationary, we can start building the ARIMA MODEL by first looking at the ACF and PACF plots.

grid.arrange(ggAcf(store_12631_train_diff , lag.max = 40, main = ''), 
                  ggPacf(store_12631_train_diff , lag.max = 40, main = ''), 
                  ncol = 1, nrow = 2, 
                  top = 'STORE 12631') 

In the ACF plot above we can see that there are spikes on lag 1, 7, 14, 21 and 28 which then abruptly cut off. In addition, the PACF function has a spike at lag 1 which then geometrically decays. We should therefore add an MA term with maximum order of 1.

The auto.arima function will now be used to obtain and compare four candidate models.

auto.arima(store_12631_train, trace = TRUE, ic = 'aic', d=1)
## 
##  ARIMA(2,1,2)(1,0,1)[7] with drift         : Inf
##  ARIMA(0,1,0)           with drift         : 954.1164
##  ARIMA(1,1,0)(1,0,0)[7] with drift         : 921.2819
##  ARIMA(0,1,1)(0,0,1)[7] with drift         : Inf
##  ARIMA(0,1,0)                              : 952.1165
##  ARIMA(1,1,0)           with drift         : 935.9015
##  ARIMA(1,1,0)(2,0,0)[7] with drift         : 922.4146
##  ARIMA(1,1,0)(1,0,1)[7] with drift         : Inf
##  ARIMA(1,1,0)(0,0,1)[7] with drift         : 924.7999
##  ARIMA(1,1,0)(2,0,1)[7] with drift         : Inf
##  ARIMA(0,1,0)(1,0,0)[7] with drift         : 947.006
##  ARIMA(2,1,0)(1,0,0)[7] with drift         : 918.7401
##  ARIMA(2,1,0)           with drift         : 933.1529
##  ARIMA(2,1,0)(2,0,0)[7] with drift         : 918.8837
##  ARIMA(2,1,0)(1,0,1)[7] with drift         : Inf
##  ARIMA(2,1,0)(0,0,1)[7] with drift         : 923.2471
##  ARIMA(2,1,0)(2,0,1)[7] with drift         : Inf
##  ARIMA(3,1,0)(1,0,0)[7] with drift         : 917.0977
##  ARIMA(3,1,0)           with drift         : 930.6672
##  ARIMA(3,1,0)(2,0,0)[7] with drift         : 916.2351
##  ARIMA(3,1,0)(2,0,1)[7] with drift         : Inf
##  ARIMA(3,1,0)(1,0,1)[7] with drift         : Inf
##  ARIMA(4,1,0)(2,0,0)[7] with drift         : 917.5239
##  ARIMA(3,1,1)(2,0,0)[7] with drift         : Inf
##  ARIMA(2,1,1)(2,0,0)[7] with drift         : Inf
##  ARIMA(4,1,1)(2,0,0)[7] with drift         : Inf
##  ARIMA(3,1,0)(2,0,0)[7]                    : 914.331
##  ARIMA(3,1,0)(1,0,0)[7]                    : 915.1805
##  ARIMA(3,1,0)(2,0,1)[7]                    : Inf
##  ARIMA(3,1,0)(1,0,1)[7]                    : Inf
##  ARIMA(2,1,0)(2,0,0)[7]                    : 916.92
##  ARIMA(4,1,0)(2,0,0)[7]                    : 915.6473
##  ARIMA(3,1,1)(2,0,0)[7]                    : 904.9635
##  ARIMA(3,1,1)(1,0,0)[7]                    : 905.6452
##  ARIMA(3,1,1)(2,0,1)[7]                    : Inf
##  ARIMA(3,1,1)(1,0,1)[7]                    : Inf
##  ARIMA(2,1,1)(2,0,0)[7]                    : 902.9777
##  ARIMA(2,1,1)(1,0,0)[7]                    : 903.7057
##  ARIMA(2,1,1)(2,0,1)[7]                    : Inf
##  ARIMA(2,1,1)(1,0,1)[7]                    : Inf
##  ARIMA(1,1,1)(2,0,0)[7]                    : 901.8925
##  ARIMA(1,1,1)(1,0,0)[7]                    : 903.5176
##  ARIMA(1,1,1)(2,0,1)[7]                    : 903.8378
##  ARIMA(1,1,1)(1,0,1)[7]                    : Inf
##  ARIMA(0,1,1)(2,0,0)[7]                    : 900.268
##  ARIMA(0,1,1)(1,0,0)[7]                    : 902.0849
##  ARIMA(0,1,1)(2,0,1)[7]                    : Inf
##  ARIMA(0,1,1)(1,0,1)[7]                    : Inf
##  ARIMA(0,1,0)(2,0,0)[7]                    : 944.9147
##  ARIMA(0,1,2)(2,0,0)[7]                    : 901.9605
##  ARIMA(1,1,0)(2,0,0)[7]                    : 920.4217
##  ARIMA(1,1,2)(2,0,0)[7]                    : Inf
##  ARIMA(0,1,1)(2,0,0)[7] with drift         : Inf
## 
##  Best model: ARIMA(0,1,1)(2,0,0)[7]
## Series: store_12631_train 
## ARIMA(0,1,1)(2,0,0)[7] 
## 
## Coefficients:
##           ma1    sar1    sar2
##       -0.9193  0.2820  0.2264
## s.e.   0.0504  0.1069  0.1135
## 
## sigma^2 = 1665:  log likelihood = -446.13
## AIC=900.27   AICc=900.76   BIC=910.13

The models with the lowest AIC are:

  • Model 1a: ARIMA(0,1,1)(2,0,0)[7] (AIC = 900.8)
  • Model 2a: ARIMA(0,1,2)(2,0,0)[7] (AIC = 901.96)
  auto.arima(store_12631_train, trace = TRUE, ic = 'bic', d=1)
## 
##  ARIMA(2,1,2)(1,0,1)[7] with drift         : Inf
##  ARIMA(0,1,0)           with drift         : 959.0483
##  ARIMA(1,1,0)(1,0,0)[7] with drift         : 931.1455
##  ARIMA(0,1,1)(0,0,1)[7] with drift         : Inf
##  ARIMA(0,1,0)                              : 954.5824
##  ARIMA(1,1,0)           with drift         : 943.2993
##  ARIMA(1,1,0)(2,0,0)[7] with drift         : 934.7441
##  ARIMA(1,1,0)(1,0,1)[7] with drift         : Inf
##  ARIMA(1,1,0)(0,0,1)[7] with drift         : 934.6635
##  ARIMA(1,1,0)(2,0,1)[7] with drift         : Inf
##  ARIMA(0,1,0)(1,0,0)[7] with drift         : 954.4037
##  ARIMA(2,1,0)(1,0,0)[7] with drift         : 931.0696
##  ARIMA(2,1,0)           with drift         : 943.0165
##  ARIMA(2,1,0)(2,0,0)[7] with drift         : 933.6792
##  ARIMA(2,1,0)(1,0,1)[7] with drift         : Inf
##  ARIMA(2,1,0)(0,0,1)[7] with drift         : 935.5766
##  ARIMA(2,1,0)(2,0,1)[7] with drift         : Inf
##  ARIMA(3,1,0)(1,0,0)[7] with drift         : 931.8932
##  ARIMA(2,1,1)(1,0,0)[7] with drift         : Inf
##  ARIMA(1,1,1)(1,0,0)[7] with drift         : Inf
##  ARIMA(3,1,1)(1,0,0)[7] with drift         : Inf
##  ARIMA(2,1,0)(1,0,0)[7]                    : 926.6356
##  ARIMA(2,1,0)                              : 938.5764
##  ARIMA(2,1,0)(2,0,0)[7]                    : 929.2495
##  ARIMA(2,1,0)(1,0,1)[7]                    : Inf
##  ARIMA(2,1,0)(0,0,1)[7]                    : 931.1363
##  ARIMA(2,1,0)(2,0,1)[7]                    : Inf
##  ARIMA(1,1,0)(1,0,0)[7]                    : 926.6866
##  ARIMA(3,1,0)(1,0,0)[7]                    : 927.51
##  ARIMA(2,1,1)(1,0,0)[7]                    : 916.0352
##  ARIMA(2,1,1)                              : 924.4556
##  ARIMA(2,1,1)(2,0,0)[7]                    : 917.7732
##  ARIMA(2,1,1)(1,0,1)[7]                    : Inf
##  ARIMA(2,1,1)(0,0,1)[7]                    : 920.1182
##  ARIMA(2,1,1)(2,0,1)[7]                    : Inf
##  ARIMA(1,1,1)(1,0,0)[7]                    : 913.3812
##  ARIMA(1,1,1)                              : 920.0001
##  ARIMA(1,1,1)(2,0,0)[7]                    : 914.2221
##  ARIMA(1,1,1)(1,0,1)[7]                    : Inf
##  ARIMA(1,1,1)(0,0,1)[7]                    : 916.7736
##  ARIMA(1,1,1)(2,0,1)[7]                    : 918.6332
##  ARIMA(0,1,1)(1,0,0)[7]                    : 909.4826
##  ARIMA(0,1,1)                              : 916.0889
##  ARIMA(0,1,1)(2,0,0)[7]                    : 910.1317
##  ARIMA(0,1,1)(1,0,1)[7]                    : Inf
##  ARIMA(0,1,1)(0,0,1)[7]                    : 912.9707
##  ARIMA(0,1,1)(2,0,1)[7]                    : Inf
##  ARIMA(0,1,0)(1,0,0)[7]                    : 949.9379
##  ARIMA(0,1,2)(1,0,0)[7]                    : 913.517
##  ARIMA(1,1,2)(1,0,0)[7]                    : Inf
##  ARIMA(0,1,1)(1,0,0)[7] with drift         : Inf
## 
##  Best model: ARIMA(0,1,1)(1,0,0)[7]
## Series: store_12631_train 
## ARIMA(0,1,1)(1,0,0)[7] 
## 
## Coefficients:
##           ma1    sar1
##       -0.9113  0.3603
## s.e.   0.0507  0.1043
## 
## sigma^2 = 1734:  log likelihood = -448.04
## AIC=902.08   AICc=902.37   BIC=909.48

Thus the models with lowest BIC are:

  • Model 1b: ARIMA(0,1,1)(1,0,0)[7] (BIC = 909.5)
  • Model 2b: ARIMA(0,1,1)(2,0,0)[7] (BIC = 910.1)

Models 1a and 2b are in fact the same, so we will only compare three models:

# fitting four candidate models 

store_12631_arima_1a <- Arima(store_12631_train, order = c(0, 1, 1), 
                       seasonal = list(order = c(2, 0, 0), period = 7), include.drift = FALSE)
store_12631_arima_2a <- Arima(store_12631_train, order = c(0, 1, 2), 
                       seasonal = list(order = c(2, 0, 0), period = 7), include.drift = FALSE)
store_12631_arima_1b <- Arima(store_12631_train, order = c(0, 1, 1), 
                       seasonal = list(order = c(1, 0, 0), period = 7), include.drift = FALSE)

We can first investigate how well they fit and forecast the data by looking at the training and test set errors.

kable(accuracy(forecast(store_12631_arima_1a, h = 15), store_12631_test), caption='Training and Test Errors for ARIMA(0,1,1)(2,0,0)[7]')
Training and Test Errors for ARIMA(0,1,1)(2,0,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 4.634409 39.86832 30.57653 -0.2885887 11.64688 0.8404137 0.0357848 NA
Test set -25.493976 48.96325 45.49852 -12.3456301 18.09813 1.2505532 -0.0188809 0.734714
kable(accuracy(forecast(store_12631_arima_2a, h = 15), store_12631_test), caption='Training and Test Errors for ARIMA(0,1,2)(2,0,0)[7]')
Training and Test Errors for ARIMA(0,1,2)(2,0,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 4.826822 39.81363 30.38169 -0.2096705 11.57788 0.8350583 -0.0197309 NA
Test set -24.080054 48.17746 44.82534 -11.7836879 17.75871 1.2320504 -0.0131598 0.7247139
kable(accuracy(forecast(store_12631_arima_1b, h = 15), store_12631_test), caption='Training and Test Errors for ARIMA(0,1,1)(1,0,0)[7]')
Training and Test Errors for ARIMA(0,1,1)(1,0,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 5.442351 40.92991 31.37964 -0.0815231 11.88737 0.8624875 0.0455518 NA
Test set -29.896419 56.45868 51.86557 -14.5189451 20.65516 1.4255553 0.0722019 0.8470891

The ARIMA(0,1,1)(2,0,0)[7] and ARIMA(0,1,2)(2,0,0)[7] models have the best overall fit. However, ARIMA(0,1,2)(2,0,0)[7] has a slightly better MAE value, so it will be used to forecast the lettuce demand for the next two weeks. We now carry out the following residual tests:

autoplot(store_12631_arima_2a$residuals) + ggtitle('ARIMA(0,1,2)(2,0,0)[7] Residuals') + xlab('Week') + ylab('Residuals')

The plot above shows that the residuals have a mean 0 and a more or less constant variance. As this is a very small dataset, we cannot expect the variance to be completely constant across all residuals. The ACF plot of the residuals is also checked below:

ggAcf(store_12631_arima_2a$residuals) + ggtitle('ACF plot for ARIMA(0,1,2)(2,0,0)[7] Residuals')

All acf values are within the critical range, therefore we can conclude that we have a satisfactory model.

Finally, the Ljung-Box test is performed below to check whether the residuals are iid:

Box.test(store_12631_arima_2a$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  store_12631_arima_2a$residuals
## X-squared = 0.035441, df = 1, p-value = 0.8507

Since our p-value is larger than the critical value of 0.05, we don’t have enough evidence to reject the null hypothesis and conclude that our model fits the data well and the residuals are iid.

We can now re-train the model using the entire data set and forecast the demand of lettuce for the next two weeks as shown below:

store_12631_arima_final <- Arima(store_12631_ts, order = c(0, 1, 2), 
                       seasonal = list(order = c(2, 0, 0), period = 7), include.drift = FALSE)
store_12631_arima_fc <- forecast(store_12631_arima_final, h = 14)

The below time series plot shows the 14-day forecast. The shaded area represents the 80% and 95% prediction interval.

plot(store_12631_arima_fc)


Store 20974

We first look at the time series plot to see if there is a clear trend:

autoplot(store_20974_train) + ggtitle('Daily Lettuce Demand for Store 20974', subtitle = 'Training Set') + xlab('Week') + ylab('Lettuce Quantity (ounces)')

As we can see, there no visible trend across the time series for store 20974. This can also be confirmed by calling R’s ndiffs() function, which gives the number of differences required in order to stationarize a time series.

ndiffs(store_20974_train)
## [1] 0

Since there 0 differences required, we get confirmation that the series is stationary in terms of trend. Checking the nsdiffs function and also the plot above, confirms that there are not any strong seasonality patterns:

nsdiffs(store_20974_train)
## [1] 0

We can also ensure that the time series is stationary using the following stationarity tests.

adf.test(store_20974_train) 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  store_20974_train
## Dickey-Fuller = -3.6702, Lag order = 4, p-value = 0.03328
## alternative hypothesis: stationary
pp.test(store_20974_train)
## 
##  Phillips-Perron Unit Root Test
## 
## data:  store_20974_train
## Dickey-Fuller Z(alpha) = -49.069, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(store_20974_train) 
## 
##  KPSS Test for Level Stationarity
## 
## data:  store_20974_train
## KPSS Level = 0.24821, Truncation lag parameter = 3, p-value = 0.1

In both the adf and pp test we reject the null hypothesis and conclude that the time series does not have unit roots and is thus stationary. In addition, we fail to reject the null hypothesis in the kpss test which is further confirmation that the series is stationary.

Now that the time series is stationary, we can start building the ARIMA MODEL by first looking at the ACF and PACF plots.

grid.arrange(ggAcf(store_20974_train , lag.max = 40, main = ''), 
                  ggPacf(store_20974_train , lag.max = 40, main = ''), 
                  ncol = 1, nrow = 2, 
                  top = 'STORE 20974') 

In the ACF plot above we can see that there are spikes on the first and 7th lag that geometrically decay. In addition, the PACF plot cuts off after lag 1 and also has a spike at lag 7. Therefore, we should add an autoregressive term with maximum order of 1 and also include a seasonal AR component. Since there are only significant spikes at lag 7 (and not 14, or 21 etc) the order of the seasonal AR component should also be 1.

The auto.arima function will now be used to obtain and compare four candidate models.

auto.arima(store_20974_train, trace = TRUE, ic = 'aic')
## 
##  ARIMA(2,0,2)(1,0,1)[7] with non-zero mean : Inf
##  ARIMA(0,0,0)           with non-zero mean : 813.0015
##  ARIMA(1,0,0)(1,0,0)[7] with non-zero mean : 800.0117
##  ARIMA(0,0,1)(0,0,1)[7] with non-zero mean : 803.1858
##  ARIMA(0,0,0)           with zero mean     : 1027.574
##  ARIMA(1,0,0)           with non-zero mean : 807.0829
##  ARIMA(1,0,0)(2,0,0)[7] with non-zero mean : 801.0644
##  ARIMA(1,0,0)(1,0,1)[7] with non-zero mean : Inf
##  ARIMA(1,0,0)(0,0,1)[7] with non-zero mean : 802.1917
##  ARIMA(1,0,0)(2,0,1)[7] with non-zero mean : Inf
##  ARIMA(0,0,0)(1,0,0)[7] with non-zero mean : 802.8533
##  ARIMA(2,0,0)(1,0,0)[7] with non-zero mean : 801.642
##  ARIMA(1,0,1)(1,0,0)[7] with non-zero mean : 801.7487
##  ARIMA(0,0,1)(1,0,0)[7] with non-zero mean : 800.8511
##  ARIMA(2,0,1)(1,0,0)[7] with non-zero mean : Inf
##  ARIMA(1,0,0)(1,0,0)[7] with zero mean     : 839.4687
## 
##  Best model: ARIMA(1,0,0)(1,0,0)[7] with non-zero mean
## Series: store_20974_train 
## ARIMA(1,0,0)(1,0,0)[7] with non-zero mean 
## 
## Coefficients:
##          ar1    sar1      mean
##       0.2546  0.3481  216.3048
## s.e.  0.1136  0.1108   10.7150
## 
## sigma^2 = 2322:  log likelihood = -396.01
## AIC=800.01   AICc=800.58   BIC=809.28

Thus, the models with the lowest AIC are:

  • Model 1a: ARIMA(1,0,0)(1,0,0)[7] (AIC = 800.01)
  • Model 2a: ARIMA(1,0,0)(2,0,0)[7] (AIC = 801.06)
auto.arima(store_20974_train, trace = TRUE, ic = 'bic')
## 
##  ARIMA(2,0,2)(1,0,1)[7] with non-zero mean : Inf
##  ARIMA(0,0,0)           with non-zero mean : 817.6365
##  ARIMA(1,0,0)(1,0,0)[7] with non-zero mean : 809.2817
##  ARIMA(0,0,1)(0,0,1)[7] with non-zero mean : 812.4558
##  ARIMA(0,0,0)           with zero mean     : 1029.891
##  ARIMA(1,0,0)           with non-zero mean : 814.0354
##  ARIMA(1,0,0)(2,0,0)[7] with non-zero mean : 812.6519
##  ARIMA(1,0,0)(1,0,1)[7] with non-zero mean : Inf
##  ARIMA(1,0,0)(0,0,1)[7] with non-zero mean : 811.4616
##  ARIMA(1,0,0)(2,0,1)[7] with non-zero mean : Inf
##  ARIMA(0,0,0)(1,0,0)[7] with non-zero mean : 809.8058
##  ARIMA(2,0,0)(1,0,0)[7] with non-zero mean : 813.2294
##  ARIMA(1,0,1)(1,0,0)[7] with non-zero mean : 813.3362
##  ARIMA(0,0,1)(1,0,0)[7] with non-zero mean : 810.1211
##  ARIMA(2,0,1)(1,0,0)[7] with non-zero mean : Inf
##  ARIMA(1,0,0)(1,0,0)[7] with zero mean     : 846.4211
## 
##  Best model: ARIMA(1,0,0)(1,0,0)[7] with non-zero mean
## Series: store_20974_train 
## ARIMA(1,0,0)(1,0,0)[7] with non-zero mean 
## 
## Coefficients:
##          ar1    sar1      mean
##       0.2546  0.3481  216.3048
## s.e.  0.1136  0.1108   10.7150
## 
## sigma^2 = 2322:  log likelihood = -396.01
## AIC=800.01   AICc=800.58   BIC=809.28

Thus the models with lowest BIC are:

  • Model 1b: ARIMA(1,0,0)(1,0,0)[7] (BIC = 809.28)
  • Model 2b: ARIMA(0,0,0)(1,0,0)[7] (BIC = 809.8)

Models 1a and 1b are in fact the same, so we will only compare three models:

# fitting four candidate models 

store_20974_arima_1a <- Arima(store_20974_train, order = c(1, 0, 0), 
                       seasonal = list(order = c(1, 0, 0), period = 7), include.drift = FALSE)
store_20974_arima_2a <- Arima(store_20974_train, order = c(1, 0, 0), 
                       seasonal = list(order = c(2, 0, 0), period = 7), include.drift = FALSE)
store_20974_arima_2b <- Arima(store_20974_train, order = c(0, 0, 0), 
                       seasonal = list(order = c(1, 0, 0), period = 7), include.drift = FALSE)

We can first investigate how well they fit and forecast the data by looking at the training and test set errors.

kable(accuracy(forecast(store_20974_arima_1a, h = 13), store_20974_test), caption='Training and Test Errors for ARIMA(1,0,0)(1,0,0)[7]')
Training and Test Errors for ARIMA(1,0,0)(1,0,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 1.24246 47.21273 37.49184 -17.521823 32.14153 0.8024693 -0.0188453 NA
Test set 10.48480 54.36894 33.92973 0.191571 13.78476 0.7262266 -0.1280506 0.7466159
kable(accuracy(forecast(store_20974_arima_2a, h = 13), store_20974_test), caption='Training and Test Errors for ARIMA(1,0,0)(2,0,0)[7]')
Training and Test Errors for ARIMA(1,0,0)(2,0,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 1.533626 46.84013 37.71961 -16.607178 31.45836 0.8073445 -0.0195521 NA
Test set 13.914109 53.39686 33.22993 2.132582 13.22273 0.7112481 -0.2002685 0.7332362
kable(accuracy(forecast(store_20974_arima_2b, h = 13), store_20974_test), caption='Training and Test Errors for ARIMA(0,0,0)(1,0,0)[7]')
Training and Test Errors for ARIMA(0,0,0)(1,0,0)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 1.659757 48.68164 38.45370 -17.1064141 32.17345 0.8230567 0.2404847 NA
Test set 11.248799 53.96458 33.39448 0.6664744 13.47798 0.7147701 -0.1542946 0.7414021

All models have a very similar performance. However, ARIMA(1,0,0)(2,0,0)[7] has a slightly better RMSE value, so it will be used to forecast the lettuce demand for the next two weeks. We now carry out the following residual tests:

autoplot(store_20974_arima_2a$residuals) + ggtitle('ARIMA(1,0,0)(2,0,0)[7] Residuals') + xlab('Week') + ylab('Residuals')

As warned by the auto.arima output, the residuals don’t have a mean of exactly zero. This does not necessarily mean that this is a bad model. Perhaps the removal of the first few observations, and thus the smaller training set, have not allowed us to train the model as well. The ACF plot of the residuals is also checked below:

ggAcf(store_20974_arima_2a$residuals) + ggtitle('ACF plot for ARIMA(1,0,0)(2,0,0)[7] Residuals')

All ACF values are within the critical range, therefore we can conclude that we have a satisfactory model.

Finally, the Ljung-Box test is performed below to check whether the residuals are iid:

Box.test(store_20974_arima_2a$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  store_20974_arima_2a$residuals
## X-squared = 0.029834, df = 1, p-value = 0.8629

Since our p-value is larger than the critical value of 0.05, we don’t have enough evidence to reject the null hypothesis and conclude that our model fits the data well and the residuals are iid.

We can now re-train the model using the entire data set and forecast the demand of lettuce for the next two weeks as shown below:

store_20974_arima_final <- Arima(store_20974_ts, order = c(1, 0, 0), 
                       seasonal = list(order = c(2, 0, 0), period = 7), include.drift = FALSE)
store_20974_arima_fc <- forecast(store_20974_arima_final, h = 14)

The below time series plot shows the 14-day forecast. The shaded area represents the 80% and 95% prediction interval.

plot(store_20974_arima_fc)


Store 46673

We first look at the time series plot to see if there is a clear trend:

autoplot(store_46673_train) + ggtitle('Daily Lettuce Demand for Store 46673', subtitle = 'Training Set') + xlab('Week') + ylab('Lettuce Quantity (ounces)')

As we can see, there does not seem to be a visible trend across the time series for store 46673. This can also be confirmed by calling R’s ndiffs() function, which gives the number of differences required in order to stationarize a time series.

ndiffs(store_46673_train)
## [1] 0

However, from the plot above as well as the nsdiffs output below, we can see that the times series needs to be de-seasonalised by taking one seasonal difference:

nsdiffs(store_46673_train)
## [1] 1

Therefore, we only take a first order difference of the series:

store_46673_train_sdiff <- diff(store_46673_train, lag=7, differences=1)
autoplot(store_46673_train_sdiff) + ggtitle('Store 46673 - Seasonally Differenced Time Series') + xlab('Week') + ylab('Differenced Lettuce Quantity')

We can now ensure that the time series is stationary using the following stationarity tests.

adf.test(store_46673_train_sdiff) 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  store_46673_train_sdiff
## Dickey-Fuller = -3.9871, Lag order = 4, p-value = 0.01446
## alternative hypothesis: stationary
pp.test(store_46673_train_sdiff)
## 
##  Phillips-Perron Unit Root Test
## 
## data:  store_46673_train_sdiff
## Dickey-Fuller Z(alpha) = -76.596, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(store_46673_train_sdiff) 
## 
##  KPSS Test for Level Stationarity
## 
## data:  store_46673_train_sdiff
## KPSS Level = 0.12099, Truncation lag parameter = 3, p-value = 0.1

In both the adf and pp test we reject the null hypothesis and conclude that the time series does not have unit roots and is thus stationary. In addition, we fail to reject the null hypothesis in the kpss test which is further confirmation that the series is stationary.

Now that the time series is stationary, we can start building the ARIMA MODEL by first looking at the ACF and PACF plots.

grid.arrange(ggAcf(store_46673_train_sdiff , lag.max = 40, main = ''), 
                  ggPacf(store_46673_train_sdiff , lag.max = 40, main = ''), 
                  ncol = 1, nrow = 2, 
                  top = 'STORE 46673') 

From the plots above, we can see that the ACF function has a spike at lag 7, while the PACF is exponentially decaying at every multiple lag of 7. Therefore, we should include a seasonal MA component in our ARIMA model.

The auto.arima function will now be used to obtain and compare four candidate models.

auto.arima(store_46673_train, trace = TRUE, ic = 'aic', D=1)
## 
##  ARIMA(2,0,2)(1,1,1)[7] with drift         : 774.7489
##  ARIMA(0,0,0)(0,1,0)[7] with drift         : 794.992
##  ARIMA(1,0,0)(1,1,0)[7] with drift         : 774.9194
##  ARIMA(0,0,1)(0,1,1)[7] with drift         : 767.0175
##  ARIMA(0,0,0)(0,1,0)[7]                    : 792.9944
##  ARIMA(0,0,1)(0,1,0)[7] with drift         : 796.9891
##  ARIMA(0,0,1)(1,1,1)[7] with drift         : 768.9765
##  ARIMA(0,0,1)(0,1,2)[7] with drift         : 768.9835
##  ARIMA(0,0,1)(1,1,0)[7] with drift         : 774.8367
##  ARIMA(0,0,1)(1,1,2)[7] with drift         : Inf
##  ARIMA(0,0,0)(0,1,1)[7] with drift         : 767.3548
##  ARIMA(1,0,1)(0,1,1)[7] with drift         : 769.017
##  ARIMA(0,0,2)(0,1,1)[7] with drift         : 769.017
##  ARIMA(1,0,0)(0,1,1)[7] with drift         : 767.0954
##  ARIMA(1,0,2)(0,1,1)[7] with drift         : Inf
##  ARIMA(0,0,1)(0,1,1)[7]                    : 765.5681
##  ARIMA(0,0,1)(0,1,0)[7]                    : 794.9915
##  ARIMA(0,0,1)(1,1,1)[7]                    : 767.4273
##  ARIMA(0,0,1)(0,1,2)[7]                    : 767.4459
##  ARIMA(0,0,1)(1,1,0)[7]                    : 772.8551
##  ARIMA(0,0,1)(1,1,2)[7]                    : Inf
##  ARIMA(0,0,0)(0,1,1)[7]                    : 765.9212
##  ARIMA(1,0,1)(0,1,1)[7]                    : 767.5676
##  ARIMA(0,0,2)(0,1,1)[7]                    : 767.5675
##  ARIMA(1,0,0)(0,1,1)[7]                    : 765.6205
##  ARIMA(1,0,2)(0,1,1)[7]                    : Inf
## 
##  Best model: ARIMA(0,0,1)(0,1,1)[7]
## Series: store_46673_train 
## ARIMA(0,0,1)(0,1,1)[7] 
## 
## Coefficients:
##          ma1     sma1
##       0.1770  -0.7328
## s.e.  0.1127   0.1183
## 
## sigma^2 = 663.5:  log likelihood = -379.78
## AIC=765.57   AICc=765.88   BIC=772.75

Thus, the models with the lowest AIC are:

  • Model 1a: ARIMA(0,0,1)(0,1,1)[7] (AIC = 765.6)
  • Model 2a: ARIMA(1,0,0)(0,1,1)[7] (AIC = 765.6)
auto.arima(store_46673_train, trace = TRUE, ic = 'bic', D=1)
## 
##  ARIMA(2,0,2)(1,1,1)[7] with drift         : 793.9045
##  ARIMA(0,0,0)(0,1,0)[7] with drift         : 799.7809
##  ARIMA(1,0,0)(1,1,0)[7] with drift         : 784.4972
##  ARIMA(0,0,1)(0,1,1)[7] with drift         : 776.5953
##  ARIMA(0,0,0)(0,1,0)[7]                    : 795.3888
##  ARIMA(0,0,1)(0,1,0)[7] with drift         : 804.1724
##  ARIMA(0,0,1)(1,1,1)[7] with drift         : 780.9487
##  ARIMA(0,0,1)(0,1,2)[7] with drift         : 780.9557
##  ARIMA(0,0,1)(1,1,0)[7] with drift         : 784.4145
##  ARIMA(0,0,1)(1,1,2)[7] with drift         : Inf
##  ARIMA(0,0,0)(0,1,1)[7] with drift         : 774.5381
##  ARIMA(0,0,0)(1,1,1)[7] with drift         : 778.8883
##  ARIMA(0,0,0)(0,1,2)[7] with drift         : 778.9041
##  ARIMA(0,0,0)(1,1,0)[7] with drift         : 780.845
##  ARIMA(0,0,0)(1,1,2)[7] with drift         : Inf
##  ARIMA(1,0,0)(0,1,1)[7] with drift         : 776.6732
##  ARIMA(1,0,1)(0,1,1)[7] with drift         : 780.9892
##  ARIMA(0,0,0)(0,1,1)[7]                    : 770.7101
##  ARIMA(0,0,0)(1,1,1)[7]                    : 775.0978
##  ARIMA(0,0,0)(0,1,2)[7]                    : 775.1
##  ARIMA(0,0,0)(1,1,0)[7]                    : 776.4768
##  ARIMA(0,0,0)(1,1,2)[7]                    : Inf
##  ARIMA(1,0,0)(0,1,1)[7]                    : 772.8039
##  ARIMA(0,0,1)(0,1,1)[7]                    : 772.7514
##  ARIMA(1,0,1)(0,1,1)[7]                    : 777.1454
## 
##  Best model: ARIMA(0,0,0)(0,1,1)[7]
## Series: store_46673_train 
## ARIMA(0,0,0)(0,1,1)[7] 
## 
## Coefficients:
##          sma1
##       -0.6820
## s.e.   0.1209
## 
## sigma^2 = 683.3:  log likelihood = -380.96
## AIC=765.92   AICc=766.08   BIC=770.71

Thus the models with lowest BIC are:

  • Model 1b: ARIMA(0,0,0)(0,1,1)[7] (BIC = 770.7)
  • Model 2b: ARIMA(0,0,1)(0,1,1)[7] (BIC = 772.8)

Models 1a and 2b are in fact the same, so we will only compare three models:

# fitting four candidate models 

store_46673_arima_1a <- Arima(store_46673_train, order = c(0, 0, 1), 
                       seasonal = list(order = c(0, 1, 1), period = 7), include.drift = FALSE)
store_46673_arima_2a <- Arima(store_46673_train, order = c(1, 0, 0), 
                       seasonal = list(order = c(0, 1, 1), period = 7), include.drift = FALSE)
store_46673_arima_1b <- Arima(store_46673_train, order = c(0, 0, 0), 
                       seasonal = list(order = c(0, 1, 1), period = 7), include.drift = FALSE)

We can first investigate how well they fit and forecast the data by looking at the training and test set errors.

kable(accuracy(forecast(store_46673_arima_1a, h = 15), store_46673_test), caption='Training and Test Errors for ARIMA(0,0,1)(0,1,1)[7]')
Training and Test Errors for ARIMA(0,0,1)(0,1,1)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.3988486 24.40666 17.82157 -2.990583 13.47286 0.6815614 0.0047168 NA
Test set 11.8408180 42.79184 32.31374 4.914431 20.21503 1.2357946 0.1579477 0.6732954
kable(accuracy(forecast(store_46673_arima_2a, h = 15), store_46673_test), caption='Training and Test Errors for ARIMA(1,0,0)(0,1,1)[7]')
Training and Test Errors for ARIMA(1,0,0)(0,1,1)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.3864516 24.40934 17.84919 -3.007139 13.47101 0.6826176 0.0092950 NA
Test set 11.8727958 42.83647 32.37826 4.923857 20.24729 1.2382621 0.1563748 0.6730237
kable(accuracy(forecast(store_46673_arima_1b, h = 15), store_46673_test), caption='Training and Test Errors for ARIMA(0,0,0)(0,1,1)[7]')
Training and Test Errors for ARIMA(0,0,0)(0,1,1)[7]
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.5323512 24.92335 18.37475 -3.217806 13.95744 0.7027171 0.1647257 NA
Test set 13.4322176 44.54181 33.66570 5.891811 21.01492 1.2874983 0.1689128 0.6947363

All models have an almost identical performance in terms of training set fit. However, ARIMA(0,0,1)(0,1,1)[7] has a slightly better RMSE value, so it will be used to forecast the lettuce demand for the next two weeks. We now carry out the following residual tests:

autoplot(store_46673_arima_1a$residuals) + ggtitle('ARIMA(0,0,1)(0,1,1)[7] Residuals') + xlab('Week') + ylab('Residuals')

The plot above shows that the residuals have a mean 0 and a more or less constant variance. As this is a very small dataset, we cannot expect the variance to be completely constant across all residuals. The ACF plot of the residuals is also checked below:

ggAcf(store_46673_arima_1a$residuals) + ggtitle('ACF plot for ARIMA(0,0,1)(0,1,1)[7] Residuals')

All ACF values are within the critical range, therefore we can conclude that we have a satisfactory model.

Finally, the Ljung-Box test is performed below to check whether the residuals are iid:

Box.test(store_46673_arima_1a$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  store_46673_arima_1a$residuals
## X-squared = 0.0020253, df = 1, p-value = 0.9641

Since our p-value is larger than the critical value of 0.05, we don’t have enough evidence to reject the null hypothesis and conclude that our model fits the data well and the residuals are iid.

We can now re-train the model using the entire data set and forecast the demand of lettuce for the next two weeks as shown below:

store_46673_arima_final <- Arima(store_46673_ts, order = c(0, 0, 1), 
                       seasonal = list(order = c(0, 1, 1), period = 7), include.drift = FALSE)
store_46673_arima_fc <- forecast(store_46673_arima_final, h = 14)

The below time series plot shows the 14-day forecast. The shaded area represents the 80% and 95% prediction interval.

plot(store_46673_arima_fc)


4. Performance Comparison

The following section will compare the fit and forecast performance of the two methods for each store and pick the best performing one for the official forecast. The following is a summary of the models picked, and the code to input the forecasts in an Excel file:

forecasts <- cbind(as.data.frame(store_4904_arima_fc)[1], as.data.frame(store_12631_ets_fc)[1], as.data.frame(store_20974_arima_fc)[1], as.data.frame(store_46673_ets_fc)[1])

names(forecasts) <- c('Store 4904','Store 12631','Store 20974','Store 46673')

rownames(forecasts) <- 1:14

kable(forecasts)
Store 4904 Store 12631 Store 20974 Store 46673
360.4455 262.5730 225.0381 159.79583
328.7233 277.1642 236.1516 173.08458
296.3099 297.7365 274.9644 164.56924
232.0672 303.2692 197.9999 101.27350
231.4814 237.5404 207.1209 76.91392
362.6138 268.6961 198.7878 170.12068
336.8297 249.1375 234.9984 175.76910
360.4455 262.5730 219.5166 159.79583
328.7233 277.1642 228.8890 173.08458
296.3099 297.7365 257.8649 164.56924
232.0672 303.2692 205.5278 101.27350
231.4814 237.5404 213.0050 76.91392
362.6138 268.6961 205.4249 170.12068
336.8297 249.1375 229.7271 175.76910
#write.xlsx(forecasts,"~/Desktop/LSCA/01918940.xlsx")

Overall, there is an equal split in the number of times Holt Winters and ARIMA was used for the final forecast. In our case, it does not matter if we get small errors. If we slightly over-forecast, then supply will be more than demand and the restaurant can just store some lettuce in the fridge for the next few days. On the other hand, if we slightly under-forecast, then the restaurant can put a slightly lower quantity of lettuce in each sandwich.

Store 4904

Firstly we plot the fitted values on top of the original time series in order to visualise which model fits the data better.

plot(store_4904_ts)
lines(fitted(store_4904_ets_final ), col = "blue")
lines(fitted(store_4904_arima_final ), col = "red", lty = 2)
legend(23, 450, legend=c("HW", "ARIMA"),
       col=c("blue", "red"), lty=1:2, cex=0.9)
title('Comparing fit of ets(ANA) vs ARIMA(0,0,0)(0,1,1)[7]')

In some cases, the model fails to capture some spikes that are part of the data. In general both models have a very similar fit. However, the Holt Winters model seems to be better at following the magnitude of the data. The ARIMA model often underestimates the values. This is also clear below, where the training set RMSE is 43 and 49 for the Holt Winters and ARIMA method repectively.

store_4904_ets_df <- as.data.frame(accuracy(forecast(store_4904_ets, h = 14) , store_4904_test))
store_4904_arima_df <- as.data.frame(accuracy(forecast(store_4904_arima_1bb, h = 14) , store_4904_test))

store_4904_errors <- rbind(store_4904_ets_df, store_4904_arima_df)

rownames(store_4904_errors)[1] <- 'Training Set Errors (ets)' 
rownames(store_4904_errors)[2] <- 'Test Set Errors (ets)' 
rownames(store_4904_errors)[3] <- 'Training Set Errors (arima)' 
rownames(store_4904_errors)[4] <- 'Test Set Errors (arima)' 

kable(store_4904_errors)
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training Set Errors (ets) -0.9194867 43.04688 32.02198 -2.351651 10.999318 0.6799502 -0.0667622 NA
Test Set Errors (ets) -9.7799936 37.75467 28.39140 -3.682129 9.266691 0.6028589 0.1306179 0.4259769
Training Set Errors (arima) -2.8060686 49.98830 36.22054 -2.768926 12.340051 0.7691017 0.1673837 NA
Test Set Errors (arima) -10.4112517 34.99824 27.70366 -4.456707 9.284251 0.5882556 -0.0425864 0.4101133

When looking at the test performance however, the ARIMA model forecasts slightly better when looking at the RMSE and MAE figures. We will therefore be using this model for the official forecast.

Store 12631

Firstly we plot the fitted values on top of the original time series in order to visualise which model fits the data better.

plot(store_12631_ts)
lines(fitted(store_12631_ets_final ), col = "blue")
lines(fitted(store_12631_arima_final ), col = "red", lty = 2)
legend(23, 450, legend=c("HW", "ARIMA"),
       col=c("blue", "red"), lty=1:2, cex=0.9)
title('Comparing fit of ets(MNA) vs ARIMA(0,1,2)(2,0,0)[7]')

store_12631_ets_df <- as.data.frame(accuracy(forecast(store_12631_ets_testb, h = 15) , store_12631_test))
store_12631_arima_df <- as.data.frame(accuracy(forecast(store_12631_arima_2a, h = 15) , store_12631_test))

store_12631_errors <- rbind(store_12631_ets_df, store_12631_arima_df)

rownames(store_12631_errors)[1] <- 'Training Set Errors (ets)' 
rownames(store_12631_errors)[2] <- 'Test Set Errors (ets)' 
rownames(store_12631_errors)[3] <- 'Training Set Errors (arima)' 
rownames(store_12631_errors)[4] <- 'Test Set Errors (arima)' 

kable(store_12631_errors)
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training Set Errors (ets) 4.068353 35.90313 27.44120 -0.1419881 10.40678 0.7542373 0.0004899 NA
Test Set Errors (ets) -20.188511 45.53447 39.35159 -10.1282790 15.50693 1.0816011 0.0272487 0.7056087
Training Set Errors (arima) 4.826822 39.81363 30.38169 -0.2096705 11.57788 0.8350583 -0.0197309 NA
Test Set Errors (arima) -24.080054 48.17746 44.82534 -11.7836879 17.75871 1.2320504 -0.0131598 0.7247139

The Holt Winters model has the best forecast performance across all metrics. Therefore, it will be used for the official forecast.

Store 20974

Firstly we plot the fitted values on top of the original time series in order to visualise which model fits the data better.

plot(store_20974_ts)
lines(fitted(store_20974_ets_final ), col = "blue")
lines(fitted(store_20974_arima_final ), col = "red", lty = 2)
legend(23, 450, legend=c("HW", "ARIMA"),
       col=c("blue", "red"), lty=1:2, cex=0.9)
title('Comparing fit of ets(ANA) vs ARIMA(1,0,0)(2,0,0)[7]')

store_20974_ets_df <- as.data.frame(accuracy(forecast(store_20974_ets, h = 13) , store_20974_test))
store_20974_arima_df <- as.data.frame(accuracy(forecast(store_20974_arima_2a, h = 13) , store_20974_test))

store_20974_errors <- rbind(store_20974_ets_df, store_20974_arima_df)

rownames(store_20974_errors)[1] <- 'Training Set Errors (ets)' 
rownames(store_20974_errors)[2] <- 'Test Set Errors (ets)' 
rownames(store_20974_errors)[3] <- 'Training Set Errors (arima)' 
rownames(store_20974_errors)[4] <- 'Test Set Errors (arima)' 

kable(store_20974_errors)
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training Set Errors (ets) -1.024937 40.49810 33.41008 -13.064614 25.69316 0.7151041 0.0868321 NA
Test Set Errors (ets) 25.205681 56.86240 34.53942 8.304245 13.70945 0.7392763 -0.3209396 0.8315310
Training Set Errors (arima) 1.533626 46.84013 37.71961 -16.607178 31.45836 0.8073445 -0.0195521 NA
Test Set Errors (arima) 13.914109 53.39686 33.22993 2.132582 13.22273 0.7112481 -0.2002685 0.7332362

The ARIMA model has the best forecast performance across all metrics. Therefore, it will be used for the official forecast.

Store 46673

Firstly we plot the fitted values on top of the original time series in order to visualise which model fits the data better.

plot(store_46673_ts)
lines(fitted(store_46673_ets_final ), col = "blue")
lines(fitted(store_46673_arima_final ), col = "red", lty = 2)
legend(23, 450, legend=c("HW", "ARIMA"),
       col=c("blue", "red"), lty=1:2, cex=0.9)
title('Comparing fit of ets(ANA) vs ARIMA(0,0,1)(0,1,1)[7] ')

store_46673_ets_df <- as.data.frame(accuracy(forecast(store_46673_ets, h = 15) , store_46673_test))
store_46673_arima_df <- as.data.frame(accuracy(forecast(store_46673_arima_1a, h = 15) , store_46673_test))

store_46673_errors <- rbind(store_46673_ets_df, store_46673_arima_df)

rownames(store_46673_errors)[1] <- 'Training Set Errors (ets)' 
rownames(store_46673_errors)[2] <- 'Test Set Errors (ets)' 
rownames(store_46673_errors)[3] <- 'Training Set Errors (arima)' 
rownames(store_46673_errors)[4] <- 'Test Set Errors (arima)' 

kable(store_46673_errors)
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training Set Errors (ets) -1.1006892 23.02907 18.44768 -3.895680 14.36341 0.7055063 0.0856638 NA
Test Set Errors (ets) 15.8203916 38.73735 28.60888 8.421665 18.29080 1.0941074 0.0573268 0.6199927
Training Set Errors (arima) -0.3988486 24.40666 17.82157 -2.990583 13.47286 0.6815614 0.0047168 NA
Test Set Errors (arima) 11.8408180 42.79184 32.31374 4.914431 20.21503 1.2357946 0.1579477 0.6732954

The Holt-Winters model has the best forecast performance across all metrics. Therefore, it will be used for the official forecast.